****************************************************
* Date: 				21 May 2025
* Paper title: 			Betas distribution and ETF tracking error
* Program's purpose: 	Replicate figures
	
	clear
	set more off
	set     seed 13579
	set sortseed 13579
	cd "C:\Paper_Betas\Data_Harvard_Dataverse"

	
**********************************************
********  F I G U R E  1
	* Open dataset
		clear
		import delimited using "Dataset_for_ETFs.csv"
	* Drop duplicates for each month (because the St.Dev. of index constituents' betas is the same for each ETF)
		duplicates drop year month time stdev, force
		keep 			year month time stdev
		gen  YEAR=year+(month/12) 
		sort YEAR
	* Create Figure 1
		twoway  (line stdev YEAR, sort), xlabel(2014(2)2024)
	* End of Figure 1		

	
**********************************************
********  F I G U R E  2
	* Open dataset
		clear
		import delimited using "Dataset_for_ETFs.csv"
	* Winsorize extreme values of tracking error	
		foreach var of varlist tra_error_etf {
			capture drop UPPER LOWER
			bysort etf_id: egen UPPER=pctile(`var'), p(99) 
			bysort etf_id: egen LOWER=pctile(`var'), p(1) 
			replace `var'=UPPER 	if `var'>UPPER & `var'~=.
			replace `var'=LOWER 	if `var'<LOWER & `var'~=.
			capture drop UPPER LOWER
			}
	* Perform median regression with fixed effects (with control variable)
		bsqreg tra_error_etf stdev abs_return_sp500, reps(250)
	* Create Figure 2
		grqreg stdev, qmin(10) qmax(90) qstep(5) ci reps(250) seed(13579)
	* End of Figure 2	
		